/* This code compiles HEGIS college data into county-level data, following Kling (2001). The original code and data can be found in the data archive of the paper. 
The code is modified from the original version to extract relevant information only.
*/

clear all

infile using HEGIS.dct

*DROP chiefly post-baccalaureate
drop if reclass==4

*DROP female only
drop if sex == 2

gen two = (hi==3|hi==9)
gen four = (two==0)

*ENFORCE public classifcation
replace pubin = . if public == 0

gen pub2 = (public==1 & two==1)
gen pub4 = (public==1 & four==1)

foreach v in 2 4{
	gen t1 = pub`v'*enroll
	gen t2 = pub`v'*enroll*pubin
	egen s1 = sum(t1), by(sfips cfips)
	egen s2 = sum(t2), by(sfips cfips)
	gen avgpub`v'_e = cond(s1==0,0,s2/s1)
	drop t1 t2 s1 s2
}
tabstat public enroll pubin if pub2==1, stat(mean sd min q max N)
tabstat public enroll pubin if pub4==1, stat(mean sd min q max N)

*AGGREGATE to county-level data
collapse (max) pub2 pub4 (mean) avgpub2_e avgpub4_e, by(sfips cfips)
gen all = 1
sort sfips cfips
save temp,replace
clear
infile using FIPS.dct
sort sfips cfips
merge sfips cfips using temp

drop _merge
recode all .=0
recode pub2 .=0
recode pub4 .=0
recode avgpub2_e .=0
recode avgpub4_e .=0

*ASSIGN NLSY FIPS territory codes
replace sfips = "60" if sfips == "62"
replace sfips = "61" if sfips == "63"
replace sfips = "66" if sfips == "64"
replace sfips = "72" if sfips == "65"
replace sfips = "75" if sfips == "66"
replace sfips = "78" if sfips == "67"
replace cfips = "001" if sfips >= "60" & sfips <= "78"

*ASSIGN NLSY FIPS codes to Fairbanks North Star
replace cfips = "818" if state=="AK" & cfips=="185"

lab var pub4    "Public 4 Year College in County, CV based on HEGIS data"
lab var pub2    "Public 2 Year College in County, CV based on HEGIS data"
lab var avgpub2_e "County Mean Tuition (enrolllment weighted)--Public 2 Yr, CV based on HEGIS data"
lab var avgpub4_e "County Mean Tuition (enrolllment weighted)--Public 4 Yr, CV based on HEGIS data"
lab var state   "State Code"
lab var sfips   "State FIPS, some values were reassigned"
lab var cfips   "County FIPS, some values were reassigned"
lab var nfips   "County Name--FIPS"
lab var all     "Value = 1 if matched to US state and county data"
*tab all
label data "Cnty-level data aggr. fr 1977 HEGIS Data"

destring sfips, gen(statefip)
keep if statefip<=56
destring cfips, gen(countyfip)
compress
save HGISCNTY_r,replace

displ "County-level dataset HGISCNTY.dta, aggregated from HEGIS School data"
desc using HGISCNTY_r
